******************************************************************************************
* Do-file name:	cr_unempl_data.do                
* Task:         Creat dataset of unemployed observations 
* Last change:  27.02.2019                                                               
******************************************************************************************



******************************************************************************************
*** program setup
******************************************************************************************

version 14.2
clear all
macro drop _all
set linesize 90
set more off
* set trace on
discard
set seed 123456789



******************************************************************************************
*** set macros
******************************************************************************************

global start_year 	"1984"		// first year available 1984
global end_year 	"2000"


******************************************************************************************
*** load and clean dataset
******************************************************************************************

forvalues x = $start_year / $end_year {

use "data\orig/unemp-spells`x'.dta", clear

** clean data
drop if leh_beg_dat == . | leh_end_dat == .
bys vsnr_ano leh_beg_dat leh_end_dat: gen num = _n
keep if num == 1
drop num
compress

** append yearly data to one dataset
if `x' == $start_year {
save "data/unempl_all.dta", replace 
	}
else {
append using "data/unempl_all.dta"
sort vsnr_ano leh_beg_dat leh_end_dat
duplicates drop vsnr_ano leh_beg_dat leh_end_dat, force
save "data/unempl_all.dta", replace
	}
	}


******************************************************************************************
*** create one observation per year and vsnr
******************************************************************************************

** load data
use "data/unempl_all.dta", clear

** reshape data
clonevar date1 = leh_beg_dat
clonevar date2 = leh_end_dat
bys vsnr_ano (date1 date2): gen spell_gen = _n
reshape long date, i(vsnr_ano spell_gen) j(zeitpunkt)

** expand data
gen year = year(date)
bys vsnr_ano spell_gen (zeitpunkt): gen year_diff = year[_n+1]-year[_n]
sort vsnr_ano spell_gen zeitpunkt
expand year_diff  if zeitpunkt == 1, gen(duplicate)

** correct year values
sort vsnr_ano spell_gen zeitpunkt duplicate
bys vsnr_ano spell_gen zeitpunkt (duplicate): gen add = _n-1  if zeitpunkt == 1
replace year = year + add  if duplicate == 1
drop date year_diff duplicate add

** drop duplicates
duplicates drop vsnr_ano spell_gen leh_beg_dat leh_end_dat year, force

** keep obs. that are unemployed at stichtag
gen stichtag = mdy(6,30,year)
format stichtag %d
keep if leh_beg_dat <= stichtag & leh_end_dat >= stichtag
duplicates drop vsnr_ano year, force

** mark as unemployed
gen status = 2


******************************************************************************************
*** drop variables
******************************************************************************************

drop stichtag zeitpunkt spell_gen leh_beg_dat leh_end_dat


******************************************************************************************
*** save new dataset
******************************************************************************************

compress
label data "yearly unemployment data based on unemp-spells (1984-2000)"
notes: vsnr is classified as unemployed if unemployed at 30/06 in respective year
save "data/unempl_all_clean.dta", replace

** delete old data
erase "data/unempl_all.dta"


******************************************************************************************
*** end
******************************************************************************************

exit


*========================================================================================*
Comments:
- unique identifier: vsnr_ano year
- assumption: vsnr is classified as unemployed if vsnr is unemployed at 30/06 of the
  respective year
